# from dataclasses import dataclass
# import torch
# import torch.nn as nn

# def Block(nn.Mo)



# class GPTConfig:
#     block_size:int=256
# class GPT(nn.Moudle):
#     def __init__(self,config):


#         self.transormfer=nn.ModulDict(dict())
